package com.whoami.idmagic.tacimei

import org.apache.hadoop.io.NullWritable
import org.apache.orc.mapred.OrcStruct
import org.apache.orc.mapreduce.OrcInputFormat
import org.apache.spark.{SparkConf, SparkContext}

/**
  *
  * @author tzp
  * @since 2019/8/28
  */
object imeiJoin {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("topn")
    val sc = new SparkContext(conf)

    var imei = sc.textFile("/user/mz_spt/test/tzp/com.whoami.idmagic.xunji/imeigenerator/e1")
    var imeiKV = imei.map(f => (f, Nil))

    val log = sc.newAPIHadoopFile(
      "/user/mz_spt/datalake/src/admonitor/ods_adm_bus/20201212/CAMPAIGN*",
      classOf[OrcInputFormat[OrcStruct]],
      classOf[NullWritable],
      classOf[OrcStruct]
    )

    log
      .map(f => (f._2.getFieldValue("md5_imei").toString, f._2))
      .filter(_._1.length > 0)

  }

}
